# 160712 merge sequ in one hit table of all 80 replicas!

setwd("~/Documents/UNI_und_VORLESUNGEN/11 phd projects/1 Meta SCHMALNAU/2 HiSeq biomass 160707/7 OTUs")


files <- list.files("/Volumes/NGS 2TB/160707_Biomass/9 Blast sub", full.names=T)


tab <- c("NULL") #paste("OTU_", 1:854, sep="")
tab <- as.data.frame(tab, stringsAsFactors=F)
names(tab) <- "ID"

i <- 1

for (i in 1:80){
data <- read.csv(files[i], sep="\t", header=F, stringsAsFactors=F)

names(data) <- c("query", "otu", "ident", "length", "mism", "gap", "qstart", "qend", "target_s", "target_e", "e.value", "bitscore")

data <- data[,c(-11,-12)]

data <- cbind(data, "abund"=as.numeric(sub(".*size=(.*);", "\\1", data$query)), "otu_no"=sub("(.*);size.*", "\\1", data$otu), stringsAsFactors=F)

head(data)
#head(data, 30)
temp <- aggregate(data$abund, by=list(data$otu_no), FUN="sum")


tab <- merge(tab , temp, by.x="ID", by.y="Group.1", all=T, sort=T)
names(tab)[i+1] <- sub(".*9 Blast sub/(.*).txt", "\\1", files[i])

print(i)
}

head(tab)

tab <- tab[-1,] # remove NULL entry in the beginning

tab[is.na(tab)] <- 0

mrew <- tab$ID
mrew <- as.numeric(gsub("OTU_(.*)", "\\1", mrew))
tab <- tab[order(as.numeric(mrew)),]


head(tab)


write.csv(file="F) hit_tab+.csv", tab, row.names=F)






# quick plot
tab <- read.csv("F) hit_tab+.csv")

nrow(tab)

tab <- tab[, c(1, c(seq(1,10, 2), seq(21, 30, 2), seq(2,10, 2), seq(22, 30, 2), seq(11,20, 2), seq(31, 40, 2), seq(12,20, 2), seq(32, 40, 2))+1)]

as.data.frame(names(tab))

tab <- tab[1:300,] # subset

# calculate %ages

col_totals_g <- colSums(tab[,2:41])

for (o in 1:40){
tab[,o+1] <- tab[,o+1]/col_totals_g[o]*100
}
colSums(tab[,2:41]) # check if 100%


temp2 <- tab[,2:41]
meep <- tab[,2:41]

meep[meep>=10] <- 400
meep[meep<10&meep>=1] <- 300
meep[meep<1&meep>=0.1] <- 200
meep[meep<0.1&meep>=0.01] <- 100
meep[meep<0.01] <- 0
meep <- meep/100



head(meep)



mycol <- c( "white",  "#E2E2F2", "#9999FF", "#3232FF", "#0000A2", "Black", "Red")

pdf("F) plot.pdf", width=12, height=66)

par(mar = c(0,0,1.5,0) + 0.1)
plot(1, type="n", axes=F, xlab="", ylab="", ylim=c(-2,nrow(meep)), xlim=c(-5,47.5))

i <- 1
for (i in i:nrow(meep)){
for (k in 1:40){
	rect( k-0.5, i-0.5, k+0.5, i+0.5, col= mycol[meep[i,k]+1], border=NA, lwd=0)
	rect( k-0.5, i-0.5, k-0.5 + temp2[i,k]/10^(meep[i,k]-2), i+0.5, col= mycol[meep[i,k]+2], border=NA)
}
}

text(40.6, 1:nrow(meep), tab$ID, cex=0.5, adj=c(0, NA))

text(1:40, -5, gsub("LibTier15_", "", names(tab)[-1]), cex=0.5, srt=90)

#axis(1, at=2:41, labels=gsub("LibTier15_", "", names(tab)[-1]), cex=0.3)

dev.off()




#tab[1:300, c(1, 2,4,6,8,10)] 



# save only R1 / F1


#tab <- read.csv("hit_tab+.csv")

#temp <- tab[, c(1, grep("_BF1._BR1.", names(tab)), grep("_BR1._BF1.", names(tab)))]

#write.table(temp[1:644,], sep="\t", file="OTU_BF1+BR1.csv")






